Assignment

A short description of the post.

Mayurapriyann Arulmozhi Baskaran https://www.linkedin.com/in/mayurapriyann/
07-14-2021

Installing and Launching R Packages

packages = c('raster','sf','tmap','tidyverse','clock','rgdal','tidytext','widyr',
             'wordcloud','DT','ggwordcloud','textplot','lubridate','dplyr','hms',
             'tidygraph','ggraph','igraph','wordcloud2','hms','stringi','mapview',
             'data.table','crosstalk')

for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}

Read credit card details

cc <- read.csv(file = 'data/cc_data.csv',encoding="UTF-8")

Read credit card details

lc <- read.csv(file = 'data/loyalty_data.csv',encoding="UTF-8")

Get location frequency for credit cards

wc1 <- cc %>%
  count(location, sort=TRUE)

Get location frequency for loyalty cards

wc2 <- lc %>%
  count(location, sort=TRUE)

Basic word cloud for credit card transactions

wordcloud(words = wc1$location, freq = wc1$n, min.freq = 1,max.words=200,
          random.order=FALSE, rot.per=0,scale=c(2.5,0.5),colors=brewer.pal(8,
          "Dark2"),vfont=c("sans serif","plain"), font = 5)

Basic word cloud for loyalty card transactions

wordcloud(words = wc2$location, freq = wc2$n, min.freq = 1,max.words=200,
          random.order=FALSE, rot.per=0,scale=c(2.5,0.5),colors=brewer.pal(8,
          "Dark2"),vfont=c("sans serif","plain"), font = 5)

Least visited locations based on cc

tail(wc1)
                    location n
29    Maximum Iron and Steel 6
30           Abila Scrapyard 4
31 Octavio's Office Supplies 4
32              Frank's Fuel 2
33                    U-Pump 2
34               Daily Dealz 1

Least visited locations based on lc

tail(wc2)
                    location n
28    Maximum Iron and Steel 6
29          Roberts and Sons 6
30           Abila Scrapyard 4
31 Octavio's Office Supplies 3
32              Frank's Fuel 2
33                    U-Pump 1

Split time from date

cc <- cc %>%
  separate(timestamp, c("date", "time"), " ")

Find the day of the week

cc$day <- weekdays(as.Date(cc$date))

Convert to date format and time format

cc$date <- as.Date(cc$date, "%m/%d/%Y")
cc$hour <- as.POSIXct(cc$time, format = "%H:%M")
cc$hour <- as_hms(cc$hour)
cc$hour <- hour(cc$hour)

Generate heatmap - Location vs hour of day

cc$location <- stri_trans_general(cc$location, "latin-ascii")
x_axis_labels <- min(cc[,'hour']):max(cc[,'hour'])
group_by(cc,hour,location) %>% summarize(n=n()) %>% 
  ggplot(aes(hour,location,fill=n)) + geom_tile() +
  scale_x_continuous(expand=c(0,0),labels = x_axis_labels, breaks = x_axis_labels) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))+
  scale_fill_distiller(palette = "Reds", limits = c(0,10), na.value = "#de2d26",
                       direction = 1, labels = c(0.0, 2.5, 5.0, 7.5, "> 10.0")) +
  labs(y= "Location", x = "Hour")

cc$location <- stri_trans_general(cc$location, "latin-ascii")
group_by(cc,day,location) %>% summarize(n=n()) %>% 
  ggplot(aes(x=factor(day, level=c('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday')),location,fill=n)) + geom_tile() + 
  scale_fill_distiller(palette = "Reds", limits = c(0,10), na.value = "#de2d26",
                       direction = 1, labels = c(0.0, 2.5, 5.0, 7.5, "> 10.0")) +
  labs(y= "Location", x = "Day") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5), panel.grid.major = element_blank()) 

cc$location <- stri_trans_general(cc$location, "latin-ascii")
group_by(cc,date,location) %>% summarize(n=n()) %>% 
  ggplot(aes(x=factor(date),location,fill=n)) + geom_tile() + 
  scale_fill_distiller(palette = "Reds", limits = c(0,10), na.value = "#de2d26",
                       direction = 1, labels = c(0.0, 2.5, 5.0, 7.5, "> 10.0")) +
  labs(y= "Location", x = "Date") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5), panel.grid.major = element_blank()) 

popular_loc <- cc %>% group_by(day, location, hour) %>% summarise(n=n()) %>%
  arrange(day,desc(n))
DT::datatable(popular_loc)

Importing raster file

bgmap <- raster("data/MC2-tourist.tif")
bgmap
class      : RasterLayer 
band       : 1  (of  3  bands)
dimensions : 1595, 2706, 4316070  (nrow, ncol, ncell)
resolution : 3.16216e-05, 3.16216e-05  (x, y)
extent     : 24.82419, 24.90976, 36.04499, 36.09543  (xmin, xmax, ymin, ymax)
crs        : +proj=longlat +datum=WGS84 +no_defs 
source     : MC2-tourist.tif 
names      : MC2.tourist 
values     : 0, 255  (min, max)

Plotting Raster Layer

tm_shape(bgmap) +
  tm_rgb(bgmap, r = 1, g = 2, b = 3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255)

Importing vector GIS data file

Abila_st <- st_read(dsn = "data/Geospatial",
                    layer = "Abila")
Reading layer `Abila' from data source 
  `C:\abmayur05\VisualAnalytics\_posts\2021-07-14-assignment\data\Geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 3290 features and 9 fields
Geometry type: LINESTRING
Dimension:     XY
Bounding box:  xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
Geodetic CRS:  WGS 84

Importing Aspatial Data

gps <- read_csv("data/gps.csv")
glimpse(gps)
Rows: 685,169
Columns: 4
$ Timestamp <chr> "01/06/2014 06:28:01", "01/06/2014 06:28:01", "01/~
$ id        <dbl> 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35~
$ lat       <dbl> 36.07623, 36.07622, 36.07621, 36.07622, 36.07621, ~
$ long      <dbl> 24.87469, 24.87460, 24.87444, 24.87425, 24.87417, ~

Converting Date-Time field and ID field

gps$Timestamp <- date_time_parse(gps$Timestamp,
                                 zone = "",
                                 format = "%m/%d/%Y %H:%M:%S")
gps$id <- as_factor(gps$id)

Converting Aspatial Data Into A Simple Feature data Frame

gps_sf <- st_as_sf(gps,
                   coords = c("long","lat"),
                   crs = 4326)

Separate date and hour

gps_sf$timestamp <- gps_sf$Timestamp 
gps_sf <- gps_sf %>%
  separate(Timestamp, c("date", "time"), " ")
gps_sf$day <- weekdays(as.Date(gps_sf$date))
gps_sf$date <- as.Date(gps_sf$date, "%Y-%m-%d")
gps_sf$hour <- as.POSIXct(gps_sf$time, format = "%H:%M")
gps_sf$hour <- as_hms(gps_sf$hour)
gps_sf$hour <- hour(gps_sf$hour)

Creating Movement Path From GPS Points

gps_path <- gps_sf %>%
  group_by(id, date, hour) %>%
  summarize(m= mean(timestamp),
            do_union=FALSE) %>%
  st_cast("LINESTRING")

Finding the orphan lines

p = npts(gps_path, by_feature = TRUE)
gps_path2 <- cbind(gps_path, p)

Removing the orphan lines

gps_path3 <- gps_path2[!(gps_path2$p==1),]

Plotting the GPS Paths

gps_path_selected <- gps_path3 %>%
  filter(id==1)
tmap_mode("view")
tm_shape(bgmap) +
  tm_rgb(bgmap, r = 1, g = 2, b = 3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255) +
  tm_shape(gps_path_selected) +
  tm_lines()
tm_shape(bgmap) +
  tm_rgb(bgmap, r = 1, g = 2, b = 3,
         alpha = NA,
         saturation = 1,
         interpolate = TRUE,
         max.value = 255) +
  tm_shape(gps_path3) +
  tm_lines()